""" __author__ = yuelaiyue2"""
import requests
from lxml import etree

from mongodb import get_connection, insert_data, close_client

db = get_connection('job_51')


def get_page(page):
    url = 'https://search.51job.com/list/090200%252C020000%252C010000%252C030200%252C040000,000000,0000,00,9,99,python,2,{}.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='.format(page)
    print(page)

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.content.decode('gbk')
    return None


def parse_details_info(details):
    # details_html = etree.HTML(details)

    job = details.xpath('./p/span/a/@title')
    if job:
        company = details.xpath('./span[@class="t2"]/a/@title')

        place = details.xpath('./span[@class="t3"]/text()')

        # 没有的设置为None
        salary = details.xpath('./span[@class="t4"]/text()')
        if salary:
            salary = salary[0]
        else:
            salary = None
        time = details.xpath('./span[@class="t5"]/text()')

        data = {
            'job': job[0],
            'company': company[0],
            'place': place[0],
            'salary': salary,
            'time': time[0],
        }

        insert_data(data, db, 'data_51_job')


def parse_page(html):
    etree_html = etree.HTML(html)

    jobs = etree_html.xpath('//div[@class="el"]')

    for item in jobs:

        parse_details_info(item)


def main():

    for page in range(1, 451):
        html = get_page(page=page)

        parse_page(html)


if __name__ == '__main__':
    main()















